Graphs for pre-test survey analysis for city governments.
Necessary packages
Code
# Set a CRAN mirroroptions(repos =c(CRAN ="https://cloud.r-project.org"))# Install necessary packages# install.packages("magrittr")# install.packages("dplyr")# install.packages("ggplot2")# install.packages("knitr")# install.packages("kableExtra")# install.packages("survey")# install.packages("plotly")# Load the packageslibrary(magrittr)library(dplyr)library(plotly)library(ggplot2)library(knitr)library(kableExtra)library(survey)
Survey design weights
Code
# Read the data from the CSV filecohort3BackgroundGov <-read.csv("~/Documents/R Projects/TOPC-impact-evaluation-dashboard/Cohort 3/cohort3BackgroundGov3.csv")# Define population and sample sizespopulation_counts <-c(Akron =8, Detroit =9, Macon =10, Miami =13)sample_counts <-c(Akron =7, Detroit =1, Macon =4, Miami =3)# Total population and sample sizetotal_population <-sum(population_counts)total_sample <-sum(sample_counts)# Calculate the weightsweights <- (population_counts / total_population) / (sample_counts / total_sample)# Print the calculated weightsprint(weights)
Akron Detroit Macon Miami
0.4285714 3.3750000 0.9375000 1.6250000
Code
# Set the option for handling a single PSU in a stratum as certaintyoptions(survey.lonely.psu="certainty")# Map between city names city_map <-list("Miami-Dade County, FL"="Miami","Akron, OH"="Akron","Detroit, MI"="Detroit","Macon-Bibb County, GA"="Macon")# Update dataframe to use the mapped namescohort3BackgroundGov$city_mapped <-unlist(lapply(cohort3BackgroundGov$q4, function(x) city_map[[x]]))# Assign weights using the mapped city namescohort3BackgroundGov$weights <- weights[cohort3BackgroundGov$city_mapped]# Define the survey designdesign <-svydesign(ids =~1, strata =~city_mapped, weights =~weights, data = cohort3BackgroundGov)
Descriptive Data Analysis
Code
# # Function to calculate and plot weighted frequencies for a single question# analyze_likert_question <- function(question, design) {# formula <- as.formula(paste("~", question))# likert_distribution <- svytable(formula, design)# likert_df <- as.data.frame(likert_distribution)# names(likert_df) <- c("Response", "Frequency")# # ggplot(likert_df, aes(x = Response, y = Frequency)) +# geom_bar(stat = "identity") +# labs(title = paste("Distribution of Responses for", question))# }# # questions_to_analyze <- c("q8", "q9", "q10", "q11", "q13", "q14", "q15", "q16", "q17", "q18", "q19", "q21", "q22", "q23")# plots <- lapply(questions_to_analyze, function(q) analyze_likert_question(q, design))# # # You can now view the plots for each question# plots[[1]] # For example, to view the plot for the first question in the list
Per Question
For Akron, Macon and Miami Strata: We used the survey package functions like svyquantile and sqrt(svyvar) for calculating the median and standard deviation for strata other than Detroit. So, these statistics do take into account the sampling weights. The survey package is designed to incorporate sampling weights into its calculations, which means that the median and standard deviation for Akron (as well as for Macon and Miami) reflect the weighted characteristics of your sample.
For the Detroit Stratum: For the Detroit stratum, we calculated the median and standard deviation directly because of the single PSU issue. This direct calculation does not take into account the sampling weights since it bypasses the survey package. In cases where there’s only one PSU, the concept of weighting becomes less relevant, as there’s no variance within the stratum to adjust for. The summary statistics for Detroit are based solely on the available data for that stratum.
So, in summary:
For Akron, Macon, and Miami, the summary statistics listed are weighted and take into account the sampling design.
For Detroit, the statistics are unweighted due to the direct calculation method used.
This approach is consistent with handling complex survey data where some strata have limitations like having only one PSU. This method of handling of the data aligns well with these challenges.
Code
# Calculate central tendancy per question per stratum# List of questions to analyzequestions_to_analyze <-c("q8", "q9", "q10", "q11", "q13", "q14", "q15", "q16", "q17", "q18", "q19", "q21", "q22", "q23")# Define the stratastrata <-c("Akron", "Detroit", "Macon", "Miami")# Initialize a list to store stats for each question and stratumall_strata_stats <-list()# Loop through each stratum and question to calculate statsfor (stratum in strata) { stratum_data <-subset(cohort3BackgroundGov, city_mapped == stratum) stratum_stats <-list()for (question in questions_to_analyze) {if (stratum !="Detroit") {# Use survey package functions for strata other than Detroit median_val <-svyquantile(~get(question), subset(design, city_mapped == stratum), 0.5, na.rm =TRUE) sd_val <-sqrt(svyvar(~get(question), subset(design, city_mapped == stratum), na.rm =TRUE)) } else {# Direct calculation for Detroit question_data <- stratum_data[[question]]if (!all(is.na(question_data))) { median_val <-median(question_data, na.rm =TRUE) sd_val <-sd(question_data, na.rm =TRUE) } else { median_val <-NA sd_val <-NA } } stratum_stats[[question]] <-list(median = median_val, sd = sd_val) } all_strata_stats[[stratum]] <- stratum_stats}# # Print stats for each question and stratum# for (stratum in strata) {# cat("Statistics for stratum:", stratum, "\n")# for (question in questions_to_analyze) {# cat("Question", question, ":\n")# print(all_strata_stats[[stratum]][[question]])# cat("\n")# }# }# Initialize an empty data frame for the resultsresults_df <-data.frame(Stratum =character(),Question =character(),Median =numeric(),StandardDeviation =numeric(),stringsAsFactors =FALSE)# Loop through each stratum and question to collect statscounter <-1for (stratum in strata) {for (question in questions_to_analyze) {# Extract median and standard deviation values median_val <- all_strata_stats[[stratum]][[question]]$median sd_val <- all_strata_stats[[stratum]][[question]]$sd# Format the values correctly median_val <-ifelse(is.list(median_val), median_val[[1]], median_val) sd_val <-ifelse(is.list(sd_val), sd_val[[1]], sd_val)# Ensure that NA values are handled properly median_val <-ifelse(is.null(median_val) ||length(median_val) ==0, NA, median_val) sd_val <-ifelse(is.null(sd_val) ||length(sd_val) ==0, NA, sd_val)# Append the row to the data frame results_df[counter, ] <-c(Stratum = stratum,Question = question,Median = median_val,StandardDeviation = sd_val) counter <- counter +1 }}# Create the table using kable and kableExtraresults_table <-kable(results_df, format ="html", col.names =c("Stratum", "Question", "Median", "Standard Deviation")) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"))# Print the tableresults_table
Stratum
Question
Median
Standard Deviation
Akron
q8
2
1.38013111868471
Akron
q9
2
1.38013111868471
Akron
q10
1
0.377964473009227
Akron
q11
3
1.38013111868471
Akron
q13
3
0.755928946018454
Akron
q14
2
0.951189731211342
Akron
q15
1
0.786795792469443
Akron
q16
3
1.52752523165195
Akron
q17
2
0.899735410842437
Akron
q18
1
0.755928946018454
Akron
q19
3
1.2724180205607
Akron
q21
3
1.4638501094228
Akron
q22
4
1.13389341902768
Akron
q23
2
1.0690449676497
Detroit
q8
3
NA
Detroit
q9
3
NA
Detroit
q10
2
NA
Detroit
q11
5
NA
Detroit
q13
5
NA
Detroit
q14
5
NA
Detroit
q15
2
NA
Detroit
q16
5
NA
Detroit
q17
4
NA
Detroit
q18
2
NA
Detroit
q19
4
NA
Detroit
q21
5
NA
Detroit
q22
4
NA
Detroit
q23
2
NA
Macon
q8
3
0.957427107756338
Macon
q9
3
0.577350269189626
Macon
q10
2
1.29099444873581
Macon
q11
4
0.816496580927726
Macon
q13
3
0.957427107756338
Macon
q14
3
1.29099444873581
Macon
q15
3
0.577350269189626
Macon
q16
4
0.5
Macon
q17
3
0.577350269189626
Macon
q18
2
1.5
Macon
q19
4
0.816496580927726
Macon
q21
3
0.957427107756338
Macon
q22
4
0.816496580927726
Macon
q23
4
0
Miami
q8
4
1
Miami
q9
4
0.577350269189626
Miami
q10
4
2.08166599946613
Miami
q11
3
2
Miami
q13
4
2.08166599946613
Miami
q14
4
1.73205080756888
Miami
q15
4
0.577350269189626
Miami
q16
4
0.577350269189626
Miami
q17
4
0.577350269189626
Miami
q18
5
0.577350269189626
Miami
q19
2
2.08166599946613
Miami
q21
2
2.08166599946613
Miami
q22
4
1.52752523165195
Miami
q23
3
1.15470053837925
Per City Government Team
The below table shows summary statistics for respondents from each city and county government team who rated their skills in community research, product stewardship, and partnerships using a likert scale of 5 - Expertise in concept and terminology to 1 - New to concept and terminology.
Detroit and Miami show higher self-assessed skill levels (median of 4), but Miami has more variability in responses, meaning there’s more diversity in how respondents rate their skills, ranging from moderate to high levels of familiarity. Akron has the lowest self-assessed skill level (median of 2) with less variability, suggesting a general consensus around a basic level of understanding, while Macon is in the middle (median of 3) with moderate variability. For Macon, there is a wider range of self-assessed skill levels, though still relatively clustered around the median.
1 - New to concept and terminology: The respondent has no previous knowledge or experience with the skill in question. They are entirely unfamiliar with the concepts or terminology.
2 - The respondent has very basic knowledge or understanding of the skill. They may recognize it but are not comfortable with it.
3 - The respondent has a moderate level of familiarity with the skill. They have some experience or understanding but may not feel fully proficient.
4 - The respondent is quite familiar and comfortable with the skill, although not at the highest level of proficiency.
5 - Expertise in concept and terminology: The respondent has a very high level of familiarity with the skill. They have comprehensive knowledge and experience, and they understand the concepts and terminology thoroughly.
Code
# Central tendancy per question per stratumstrata <-c("Akron", "Detroit", "Macon", "Miami")# Check if 'all_strata_stats' is a list with an entry for each stratum# print(strata)# print(names(all_strata_stats))calculate_overall_stats <-function(stratum_stats) { combined_medians <-unlist(lapply(stratum_stats, function(x) x$median), use.names =FALSE) combined_sds <-unlist(lapply(stratum_stats, function(x) x$sd), use.names =FALSE) overall_median <-median(combined_medians, na.rm =TRUE) overall_sd <-sd(combined_sds, na.rm =TRUE)return(list(median = overall_median, sd = overall_sd))}overall_stratum_stats <-list()for (stratum in strata) {if (stratum %in%names(all_strata_stats)) { overall_stratum_stats[[stratum]] <-calculate_overall_stats(all_strata_stats[[stratum]]) } else { overall_stratum_stats[[stratum]] <-list(median =NA, sd =NA) }}# Print overall stats for each stratum# for (stratum in strata) {# cat("Overall statistics for stratum:", stratum, "\n")# print(overall_stratum_stats[[stratum]])# cat("\n")# }overall_stats_df <-data.frame(Stratum =names(overall_stratum_stats),Median =sapply(overall_stratum_stats, function(x) x$median),StandardDeviation =sapply(overall_stratum_stats, function(x) x$sd),stringsAsFactors =FALSE)overall_stats_table <-kable(overall_stats_df, format ="simple",col.names =c("Stratum", "Overall Median", "Overall Standard Deviation"))print(overall_stats_table)
Stratum Overall Median Overall Standard Deviation
-------- -------- --------------- ---------------------------
Akron Akron 2 0.3392122
Detroit Detroit 4 NA
Macon Macon 3 0.3835532
Miami Miami 4 0.6726299
Participant Background
The survey had a total of respondents from:
Akron = 7
Detroit = 1
Macon = 4
Miami = 3
However, we applied weights in this survey analysis to adjust the results to compensate for different probabilities of selection, non-response, and to adjust the sample to known population totals. The weights we provided are the factor by which each city’s responses are to be multiplied to get a representation that is more reflective of the population and the quota sampling design of this study.
For example, to interpret the graph with weight counts for Akron, OH, the city has an original count of 7 respondents and a weight of 0.4285714. The weighted count would be 7 * 0.4285714. This would suggest that the weighted frequency count for Akron should be about 3 after rounding, which matches the bar graph.
Q7. Which city/county are you representing? [Format - City/County, State abbreviation] (domestic)]
Code
# Single-variable frequency plotq4_frequency <-svytable(~q4, design = design)q4_pre_frequency_plot <-ggplot(as.data.frame(q4_frequency), aes(x = q4, y = Freq)) +geom_bar(stat ="identity", fill ="#041e42", width =0.7) +coord_flip() +labs(title ="Q7. Which city/county are you representing?",subtitle =paste("Sample size =", total_sample),x ="",y ="Number of Responses",caption ="Source: TOPC Cohort 3 Survey (2023)") +theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.minor.y =element_blank(),plot.title =element_text(face ="bold"),plot.caption =element_text(hjust =0))print(q4_pre_frequency_plot)
Q6. Please describe your racial/ethnic identity. Select all that apply.
Code
# Single-variable frequency plotq3_frequency <-svytable(~q3, design = design)q3_pre_frequency_plot <-ggplot(as.data.frame(q3_frequency), aes(x = q3, y = Freq)) +geom_bar(stat ="identity", fill ="#041e42", width =0.7) +coord_flip() +labs(title ="Q8. Which title best describes your role and level of seniority at work?",subtitle =paste("Sample size =", total_sample),x ="",y ="Number of Responses",caption ="Source: TOPC Cohort 3 Survey (2023)") +theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.minor.y =element_blank(),plot.title =element_text(face ="bold"),plot.caption =element_text(hjust =0))print(q3_pre_frequency_plot)
Q8. Which title best describes your role and level of seniority at work?
Code
# Single-variable frequency plotq5_frequency <-svytable(~q5, design = design)q5_pre_frequency_plot <-ggplot(as.data.frame(q5_frequency), aes(x = q5, y = Freq)) +geom_bar(stat ="identity", fill ="#041e42", width =0.7) +coord_flip() +labs(title ="Q8. Which title best describes your role and level of seniority at work?",subtitle =paste("Sample size =", total_sample),x ="",y ="Number of Responses",caption ="Source: TOPC Cohort 3 Survey (2023)") +theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.minor.y =element_blank(),plot.title =element_text(face ="bold"),plot.caption =element_text(hjust =0))print(q5_pre_frequency_plot)
Q10. How many years have you worked for your city or county?
Code
# Single-variable frequency plotq7_frequency <-svytable(~q7, design = design)q7_pre_frequency_plot <-ggplot(as.data.frame(q7_frequency), aes(x = q7, y = Freq)) +geom_bar(stat ="identity", fill ="#041e42", width =0.7) +coord_flip() +labs(title ="Q10. How many years have you worked for your city or county?",subtitle =paste("Sample size =", total_sample),x ="",y ="Number of Responses",caption ="Source: TOPC Cohort 3 Survey (2023)") +theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.minor.y =element_blank(),plot.title =element_text(face ="bold"),plot.caption =element_text(hjust =0))print(q7_pre_frequency_plot)
Q14. Synthesizing community research or feedback into insights or recommendations
Code
# Single-variable frequency plotq11_frequency <-svytable(~q11, design = design)q11_pre_frequency_plot <-ggplot(as.data.frame(q11_frequency), aes(x = q11, y = Freq)) +geom_bar(stat ="identity", fill ="#041e42", width =0.7) +coord_flip() +expand_limits(y =c(0, total_sample)) +labs(title ="Majority of respondents believe they moderately or experienced in their skills synthesizing community feedback into insights or recommendations.",subtitle =paste("Rating Scale: 5 - Expert to 1 - New (n =", total_sample, ")"),x ="",y ="Number of Responses",caption ="Source: TOPC Cohort 3 Survey (2023)") +theme_minimal() +theme(panel.grid.major.y =element_blank(),panel.grid.minor.y =element_blank(),plot.title =element_text(face ="bold"),plot.caption =element_text(hjust =0))# Print the plot to display it in the documentprint(q11_pre_frequency_plot)
Code
# Use ggsave to save the plotggsave("plots/q11_pre_frequency_plot.png", q11_pre_frequency_plot)# Cross-tabulation with citiesq11_cross_tab_city <-svytable(~q11 + city_mapped, design = design)q11_levels_ordered <-as.factor(as.data.frame(q11_cross_tab_city)$q11)q11_levels_ordered <-factor(q11_levels_ordered, levels =c("1", "2", "3", "4", "5"))q11_cross_tab_city_df <-as.data.frame(q11_cross_tab_city)q11_cross_tab_city_df$q11 <-factor(q11_cross_tab_city_df$q11, levels =c("1", "2", "3", "4", "5"))q11_pre_cross_tab_city_plot <-ggplot(q11_cross_tab_city_df, aes(x = city_mapped, y = Freq, fill = q11)) +geom_bar(stat ="identity", position ="stack", width =0.7) +coord_flip() +expand_limits(y =c(0, total_sample)) +scale_fill_manual(values =c("5"="#041e42", "4"="#6eaddc", "3"="#da291c", "2"="#1B786E", "1"="#701C7F")) +labs(title ="The most experienced to this skill are city teams that havepreviously participated in the TOPC program.",subtitle =paste("Rating Scale: 5 - Expert to 1 - New (n =", total_sample, ")"),x ="",y ="Number of Responses",caption ="Source: TOPC Cohort 3 Survey (2023)") +theme_minimal() +theme(legend.position ="bottom",legend.title =element_blank(),panel.grid.major.y =element_blank(),panel.grid.minor.y =element_blank(),plot.title =element_text(face ="bold"),plot.caption =element_text(hjust =0)) +guides(fill =guide_legend(reverse =TRUE, nrow =1, byrow =TRUE))print(q11_pre_cross_tab_city_plot)